# main.R
# Yimeng Li
# This script produces results and figures reported in Section 3, A.2, and A.3 of
#   "Relaxing the No Liars Assumption in List Experiment Analyses".
# This version: Dec 20, 2018.

# Set working directory:
# setwd("")

# Load libraries:
library(bindata) # required by sim_bounds.R
library(dplyr)
library(forcats)
library(ggplot2)
library(list)
library(nleqslv) # required by list_relaxed_liars.R
library(purrr)
library(tidyr)

source("list_relaxed_liars.R")
source("list_types_plot.R")
source("sim_bounds.R")

load("listExperiments.Rda")

options(digits = 3)

#*******************************************
# Section 3.1  Illustrative Example
#*******************************************

# Retrieve the distribution of answers under control and under treatment:
dist_control_Example <- listExperiments$AALL1$dist_control
dist_treated_X_Example <- listExperiments$AALL1$dist_treated
dist_treated_Y_Example <- listExperiments$AALL2$dist_treated

# Table A3:
TableA3 <- rbind(c("", 0, 1, 2, 3, 4, 5),
                 c("Control", round(dist_control_Example/sum(dist_control_Example), 2), ""),
                 c("X-treatment", round(dist_treated_X_Example/sum(dist_treated_X_Example), 2)),
                 c("Y-treatment", round(dist_treated_Y_Example/sum(dist_treated_Y_Example), 2)))
write.table(TableA3, file = "TableA3.csv", sep = ",", row.names = FALSE, col.names = FALSE, qmethod = "double")

# Function create_list_df
# creates a dataframe with response data and treatment indicator
# from the distribution of answers under control and under treatment
# to be used as inputs for function "ictreg" and "ict.test" in R package "list"
create_list_df <- function(dist_control, dist_treated, J) {
  y <- vector()
  for (i in 0:J) { y <- c(y, rep(i, dist_control[i+1])) }
  for (i in 0:(J+1)) { y <- c(y, rep(i, dist_treated[i+1])) }
  treat <- c(rep(0, sum(dist_control)), rep(1, sum(dist_treated)))
  list_df <- data.frame(y = y, treat = treat)
}

# Compute the difference-in-means estimates and 95% CI using function "ictreg" from R package "list":
DiM_X_Example <- ictreg(y ~ 1, data = create_list_df(dist_control_Example, dist_treated_X_Example, J = 4), treat = "treat", J = 4, method = "lm")
DiM_Y_Example <- ictreg(y ~ 1, data = create_list_df(dist_control_Example, dist_treated_Y_Example, J = 4), treat = "treat", J = 4, method = "lm")

# Difference-in-Means Estimates and 95% CI:
unname(DiM_X_Example$par.treat)
unname(c(DiM_X_Example$par.treat+qnorm(0.025)*DiM_X_Example$se.treat, DiM_X_Example$par.treat+qnorm(0.975)*DiM_X_Example$se.treat))
unname(DiM_Y_Example$par.treat)
unname(c(DiM_Y_Example$par.treat+qnorm(0.025)*DiM_Y_Example$se.treat, DiM_Y_Example$par.treat+qnorm(0.975)*DiM_Y_Example$se.treat))

# Compute bounds estimates and 95% confidence set using "list_relaxed_liars" function:
bounds_X_Example <- list_relaxed_liars(dist_control_Example, dist_treated_X_Example, J = 4, affirmative = TRUE)
bounds_Y_Example <- list_relaxed_liars(dist_control_Example, dist_treated_Y_Example, J = 4, affirmative = TRUE)

# Bound Estimates and 95% Confidence Set:
bounds_X_Example$bounds
bounds_X_Example$CI
bounds_Y_Example$bounds
bounds_Y_Example$CI

# Generate Figure 1 using "list_types_plot" function:
Figure1 <- list_types_plot(bounds_X_Example)

# Figure 1:
ggsave("./Figure1-Left.png", Figure1$plot_two_types, width = 4, height = 4, units = "in", dpi = 600)
ggsave("./Figure1-Right.png", Figure1$plot_three_types, width = 4, height = 4, units = "in", dpi = 600)

# List Experiments in Published Studies:
listExperiments <- listExperiments %>% discard(names(.) %in% c("AALL1", "AALL2"))

#*********************************************************
# Section 3.2  List Experiments in Published Studies
#*********************************************************

# Run Blair-Imai test of no design effect and exclude list experiments that fail the test from subsequent analysis.
append_test_p_value <- function(study) {
  study_df <- create_list_df(study$dist_control, study$dist_treated, study$J)
  study <- append(study, list(test_p_value = ict.test(study_df$y, study_df$treat, study$J, alpha = 0.05)$p))
}
listExperiments <- map(listExperiments, append_test_p_value)
listExperiments <- keep(listExperiments, function(study) { (study$test_p_value > 0.05) })

# Create an indicator for list experiments with < 50 respondents choosing all items (or zero item for list experiments with negative sensitive responses).
# For these list experiments, combine the cases of all control items and all but one control items. See Section 2.5.2 and footnote 8.
append_combine <- function(study) {
  if (study$affirmative == TRUE) { combine = ifelse(tail(study$dist_control, n=1) + tail(study$dist_treated, n=1) < 50, TRUE, FALSE)
  } else{ combine = ifelse(head(study$dist_control, n=1) + head(study$dist_treated, n=1) < 50, TRUE, FALSE) }
  if (combine == TRUE) { study$title <- paste0(study$title, "*") }
  study <- append(study, list(combine = combine))
}
listExperiments <- map(listExperiments, append_combine)

# Obtain the estimated proportions of different types of respondents, the maximal liar ratio, and the bounds estimates for list experiments in published studies:
bounds_listExps <- map(listExperiments, function(study) { list_relaxed_liars(study$dist_control, study$dist_treated, study$J, study$affirmative, study$combine, CI = FALSE) })
listExperiments <- map2(listExperiments, bounds_listExps, append)

# Transform bounds estimates contained in a list to a dataframe and group list experiments according to the number of control items (J)
# and whether an affirmative or negative response to the sensitive item is considered sensitive ("affirmative"):
bounds_listExps_df <- data.frame(
  "title" = map_chr(listExperiments, "title"),
  "J" = map_dbl(listExperiments, "J"),
  "affirmative" = map_lgl(listExperiments, "affirmative"),
  "lower_bound" = map_dbl(bounds_listExps, function(study) study$bounds[1]),
  "upper_bound" = map_dbl(bounds_listExps, function(study) study$bounds[2]),
  "maximal_liar_ratio" = map_dbl(bounds_listExps, "lambda")
) %>% mutate(
  group = case_when(J == 3 & affirmative == TRUE ~ "J = 3, sensitive = yes",
                    J == 4 & affirmative == TRUE ~ "J = 4, sensitive = yes",
                    J == 3 & affirmative == FALSE ~ "J = 3, sensitive = no",
                    J == 4 & affirmative == FALSE ~ "J = 4, sensitive = no"),
  group = factor(group, levels = c("J = 3, sensitive = yes", "J = 4, sensitive = yes",
                                   "J = 3, sensitive = no", "J = 4, sensitive = no"))
  )

# Figure 2:
Figure2 <- ggplot(bounds_listExps_df, aes(x=fct_reorder(title, maximal_liar_ratio), y=maximal_liar_ratio)) +
  geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound), width = 0.2) +
  facet_grid(group ~ ., scales="free") +
  labs(x = "Author(s), year of publication", y = "Bounds Estimates") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(text = element_text(size=10))+
  coord_flip()

ggsave("./Figure2.png", Figure2, width = 8, height = 5.4, units = "in", dpi = 600)

# Transform the estimated proportions of different types of respondents contained in a list to a dataframe:
props_listExps_df <- data.frame(
  "title" = map_chr(listExperiments, "title"),
  "J" = map_dbl(listExperiments, "J"),
  "affirmative" = map_lgl(listExperiments, "affirmative"),
  zero = map_dbl(listExperiments, function(study) { study$id_props$pt[1]/(study$id_props$pt[1]+study$id_props$pnl[1]) }),
  one = map_dbl(listExperiments, function(study) { study$id_props$pt[2]/(study$id_props$pt[2]+study$id_props$pnl[2]) }),
  two = map_dbl(listExperiments, function(study) { study$id_props$pt[3]/(study$id_props$pt[3]+study$id_props$pnl[3]) }),
  three = map_dbl(listExperiments, function(study) { study$id_props$pt[4]/(study$id_props$pt[4]+study$id_props$pnl[4]) }),
  four = map_dbl(listExperiments, function(study) { study$id_props$pt[5]/(study$id_props$pt[5]+study$id_props$pnl[5]) })
) %>%
  gather("zero", "one", "two", "three", "four", key = "num_control", value = "cond.pt") %>%
  mutate(num_control = case_when(num_control == "zero" ~ 0, num_control == "one" ~ 1, num_control == "two" ~ 2,
                                 num_control == "three" ~ 3, num_control ==  "four" ~ 4, TRUE ~ NA_real_),
         cond.pt = pmin(1.1, pmax(-0.1, cond.pt)))

# Figure A1 and A2:
FigureA1_1 <-
  ggplot(data=props_listExps_df %>% filter(J == 3 & affirmative == TRUE & num_control != 4),
         aes(x=num_control, y=cond.pt, group=title, shape=title)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1), limits=c(-0.1, 1.1)) +
  labs(x = "Number of Control Items Answered Affirmatively", y = "Proportion of Truth-tellers with Sensitive Behavior/Attitude", shape = "Author(s), year of publication") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(text = element_text(size=10))

FigureA1_2 <-
  ggplot(data=props_listExps_df %>% filter(J == 4 & affirmative == TRUE),
         aes(x=num_control, y=cond.pt, group=title, shape=title)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1), limits=c(-0.1, 1.1)) +
  labs(x = "Number of Control Items Answered Affirmatively", y = "Proportion of Truth-tellers with Sensitive Behavior/Attitude", shape = "Author(s), year of publication") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(text = element_text(size=10))

FigureA2_1 <-
  ggplot(data=props_listExps_df %>% filter(J == 3 & affirmative == FALSE & num_control != 4),
         aes(x=3-num_control, y=cond.pt, group=title, shape=title)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1), limits=c(-0.1, 1.1)) +
  labs(x = "Number of Control Items Answered Negatively", y = "Proportion of Truth-tellers with Sensitive Behavior/Attitude", shape = "Author(s), year of publication") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(text = element_text(size=10))

FigureA2_2 <-
  ggplot(data=props_listExps_df %>% filter(J == 4 & affirmative == FALSE),
         aes(x=4-num_control, y=cond.pt, group=title, shape=title)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1), limits=c(-0.1, 1.1)) +
  labs(x = "Number of Control Items Answered Negatively", y = "Proportion of Truth-tellers with Sensitive Behavior/Attitude", shape = "Author(s), year of publication") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(text = element_text(size=10))

ggsave("./FigureA1-Top.png", FigureA1_1, width = 8, height = 4, units = "in", dpi = 600)
ggsave("./FigureA1-Bottom.png", FigureA1_2, width = 8, height = 4, units = "in", dpi = 600)
ggsave("./FigureA2-Top.png", FigureA2_1, width = 8, height = 4, units = "in", dpi = 600)
ggsave("./FigureA2-Bottom.png", FigureA2_2, width = 8, height = 4, units = "in", dpi = 600)
  
#*********************************************************
# Section 3.3  Simulated List Experiments
#*********************************************************

# Simulation 1.1 ("Independent List"): all independent, with marginal probability (1/6, 1/2, 2/3, 2/3, 0.62)

probvec.1.1 <- c(1/6, 1/2, 2/3, 2/3, 0.62)

corrmat.1.1 <- cbind(c(1,0,0,0,0),
                     c(0,1,0,0,0),
                     c(0,0,1,0,0),
                     c(0,0,0,1,0),
                     c(0,0,0,0,1))

bounds.1.1.no <- sim_bounds(probvec.1.1, corrmat.1.1, J = 4, affirmative = TRUE, seed=1000, liardist="no liars", listtype="Independent")
bounds.1.1.const <- sim_bounds(probvec.1.1, corrmat.1.1, J = 4, affirmative = TRUE, seed=1001, liardist="constant liars", listtype="Independent")
bounds.1.1.nonconst <- sim_bounds(probvec.1.1, corrmat.1.1, J = 4, affirmative = TRUE, seed=1002, liardist="nonconstant liars", listtype="Independent")

# Simulation 1.2 ("Designed List"): first two items correlated with cor=-0.6; marginal probability (0.5, 0.5, 0.15, 0.85, 0.62)

probvec.1.2 <- c(0.5, 0.5, 0.15, 0.85, 0.62)

corrmat.1.2 <- cbind(c(1,-0.6,0,0,0),
                     c(-0.6,1,0,0,0),
                     c(0,0,1,0,0),
                     c(0,0,0,1,0),
                     c(0,0,0,0,1))

bounds.1.2.no <- sim_bounds(probvec.1.2, corrmat.1.2, J = 4, affirmative = TRUE, seed=1003, liardist="no liars", listtype="Designed")
bounds.1.2.const <- sim_bounds(probvec.1.2, corrmat.1.2, J = 4, affirmative = TRUE, seed=1004, liardist="constant liars", listtype="Designed")
bounds.1.2.nonconst <- sim_bounds(probvec.1.2, corrmat.1.2, J = 4, affirmative = TRUE, seed=1005, liardist="nonconstant liars", listtype="Designed")

# Simulation 1.3 ("Correlated List"): Add 0.1 correlation to every pair of items to Simulation 1.1

probvec.1.3 <- c(1/6, 1/2, 2/3, 2/3, 0.62)

corrmat.1.3 <- cbind(c(1,0.1,0.1,0.1,0.1),
                     c(0.1,1,0.1,0.1,0.1),
                     c(0.1,0.1,1,0.1,0.1),
                     c(0.1,0.1,0.1,1,0.1),
                     c(0.1,0.1,0.1,0.1,1))

bounds.1.3.no <- sim_bounds(probvec.1.3, corrmat.1.3, J = 4, affirmative = TRUE, seed=1006, liardist="no liars", listtype="Correlated")
bounds.1.3.const <- sim_bounds(probvec.1.3, corrmat.1.3, J = 4, affirmative = TRUE, seed=1007, liardist="constant liars", listtype="Correlated")
bounds.1.3.nonconst <- sim_bounds(probvec.1.3, corrmat.1.3, J = 4, affirmative = TRUE, seed=1008, liardist="nonconstant liars", listtype="Correlated")

# Simulation 1.4 ("Correlated Designed List"): Add 0.1 correlation to every pair of items to Simulation 1.2

probvec.1.4 <- c(0.5, 0.5, 0.15, 0.85, 0.62)

corrmat.1.4 <- cbind(c(1,-0.5,0.1,0.1,0.1),
                     c(-0.5,1,0.1,0.1,0.1),
                     c(0.1,0.1,1,0.1,0.1),
                     c(0.1,0.1,0.1,1,0.1),
                     c(0.1,0.1,0.1,0.1,1))

bounds.1.4.no <- sim_bounds(probvec.1.4, corrmat.1.4, J = 4, affirmative = TRUE, seed=1009, liardist="no liars", listtype="Corr. Designed")
bounds.1.4.const <- sim_bounds(probvec.1.4, corrmat.1.4, J = 4, affirmative = TRUE, seed=1010, liardist="constant liars", listtype="Corr. Designed")
bounds.1.4.nonconst <- sim_bounds(probvec.1.4, corrmat.1.4, J = 4, affirmative = TRUE, seed=1011, liardist="nonconstant liars", listtype="Corr. Designed")

# Organize the bounds estimates for 12 types of simulated list experiments:
bounds.Figure3 <-
  rbind(bounds.1.1.no, bounds.1.1.const, bounds.1.1.nonconst,
        bounds.1.2.no, bounds.1.2.const, bounds.1.2.nonconst,
        bounds.1.3.no, bounds.1.3.const, bounds.1.3.nonconst,
        bounds.1.4.no, bounds.1.4.const, bounds.1.4.nonconst) %>%
  mutate(liardist = factor(liardist, levels = c("nonconstant liars", "constant liars", "no liars")),
         listtype = factor(listtype, levels = c("Independent", "Designed", "Correlated", "Corr. Designed"))) %>%
  gather(key="bound_type", value="bound_value", -c("listtype", "liardist"))

# Figure 3:
Figure3 <- ggplot(data=bounds.Figure3, aes(x=liardist, y=bound_value, fill = bound_type)) +
  geom_violin(scale = "area", position=position_dodge(0.5)) +
  stat_summary(fun.y=median, na.rm = TRUE, geom="point", size=2, color="black") +
  geom_hline(yintercept = 0.62, linetype = "dashed") +
  facet_grid(listtype ~ ., scales="free") +
  labs(x = "Liar Distribution", y = "Bounds Estimates", fill = "") +
  scale_y_continuous(limits = c(-0.1, 1.1), breaks = c(0, 0.25, 0.5, 0.62, 0.75, 1)) +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) +
  theme(text = element_text(size=12)) +
  coord_flip() +
  scale_fill_manual(values=c("#CCCCCC", "#999999"))

ggsave("./Figure3.png", Figure3, width = 8, height = 6.4, units = "in", dpi = 600)
